Load the libraries

library(tidyverse)
library(janitor)
library(naniar)
library(ggmap)
library(shiny)
library("tidyverse")
library("naniar")
library("janitor")
library("stringr")
library(tidyverse)
library(janitor)
library(naniar)
library(tidyverse)
library(skimr)
library(janitor)
library(palmerpenguins)
library(gtools)
library(RColorBrewer)
library(paletteer)
library(ggthemes)
options(scipen = 999)

Load the Data

locations <- read.csv("data/Health_Facility_General_Information.csv") %>% 
  clean_names()

cardiac <- read_csv("data/cardiac-surgery.csv") %>% 
  clean_names()
## Rows: 1609 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Hospital Name, Detailed Region, Region, Procedure, Year of Hospital...
## dbl (8): Facility ID, Number of Cases, Number of Deaths, Observed Mortality ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
locations <- locations %>% 
    group_by(facility_id) %>% 
    summarise(across(everything(), first)) 
cardiac <- left_join(cardiac, locations, by="facility_id")

Have to separate the data, as some of them are over a range of a couple of years, some of them are year by year

cardiac <- cardiac %>% 
  filter(str_detect(year_of_hospital_discharge, "-", negate = TRUE))

Map Data:

register_stadiamaps("e2de824a-0995-4a51-915c-33c14c061e7b", write = FALSE) 
cardiac %>%  #GET MAX LAT AND LON FROM CODE BELOW!!! EASY MAKES SENSE
  select(facility_longitude, facility_latitude) %>% 
  summary()
##  facility_longitude facility_latitude
##  Min.   :-78.87     Min.   :40.58    
##  1st Qu.:-74.92     1st Qu.:40.74    
##  Median :-73.95     Median :40.88    
##  Mean   :-74.63     Mean   :41.61    
##  3rd Qu.:-73.80     3rd Qu.:42.73    
##  Max.   :-72.98     Max.   :44.70    
##  NA's   :53         NA's   :53
lat <- c(40.58, 44.70) #we got tehse values from summary above
long <- c(-78.87, -72.98)
bbox <- make_bbox(long, lat, f = 0.03)

maptotal <- get_stadiamap(bbox, maptype = "stamen_terrain", zoom=7) #Make ur basemap, there are other styles which you could find from this link, https://docs.stadiamaps.com/themes/
## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.
x <- cardiac %>% 
  arrange(hospital_name) %>% 
  group_by(facility_id, hospital_name, facility_latitude, facility_longitude) %>% 
  summarise(number_of_cases=sum(number_of_cases), average_moratality_rate=mean(observed_mortality_rate), .groups = 'keep') %>% 
  mutate(total_deaths=average_moratality_rate/100*number_of_cases) %>% 
  mutate(facility_latitude=as.numeric(facility_latitude), facility_longitude=as.numeric(facility_longitude))
lat1 <- c(40.58, 41.2) #we got tehse values from summary above
long1 <- c(-74.5, -72.98)
bbox1 <- make_bbox(long1, lat1, f = 0.03)

mapnyc <- get_stadiamap(bbox1, maptype = "stamen_terrain", zoom=8) 
## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.

Cleaning the Data

cardiac %>%
  mutate(across(everything(), tolower)) %>% 
  mutate(across(everything(), ~ str_replace_all((.), " ", "_"))) %>% 
  mutate(across(everything(), ~ str_replace_all((.), "__", "_"))) %>% 
  mutate(across(-"year_of_hospital_discharge", ~ str_remove((.), "-"))) %>% 
  mutate(across(number_of_cases:risk_adjusted_mortality_rate, as.numeric))
## # A tibble: 968 × 49
##    facility_id hospital_name               detailed_region      region procedure
##    <chr>       <chr>                       <chr>                <chr>  <chr>    
##  1 1           albany_med._ctr             capital_district     capit… all_pci  
##  2 116         arnot_ogden_med_ctr         western_ny__rochest… weste… all_pci  
##  3 746         bassett_medical_center      capital_district     capit… all_pci  
##  4 1438        bellevue_hospital_ctr       manhattan            ny_me… all_pci  
##  5 1178        bronxlebanon-concourse      bronx                ny_me… all_pci  
##  6 1286        brookdale_univ_hosp_med_ctr kings                ny_me… all_pci  
##  7 885         brookhaven_memorial         ny_metro__long_isla… ny_me… all_pci  
##  8 207         buffalo_general_med_ctr     western_ny__buffalo  weste… all_pci  
##  9 977         cayuga_med_ctr_ithaca       central_ny           centr… all_pci  
## 10 636         crouse_hospital             central_ny           centr… all_pci  
## # ℹ 958 more rows
## # ℹ 44 more variables: year_of_hospital_discharge <chr>, number_of_cases <dbl>,
## #   number_of_deaths <dbl>, observed_mortality_rate <dbl>,
## #   expected_mortality_rate <dbl>, risk_adjusted_mortality_rate <dbl>,
## #   lower_limit_of_confidence_interval <chr>,
## #   upper_limit_of_confidence_interval <chr>, comparison_results <chr>,
## #   facility_name <chr>, short_description <chr>, description <chr>, …

Plots

What is the procedure with the highest observed mortality rate?

cardiac %>% 
  group_by(procedure) %>% 
  summarize(avg_mortality = mean(observed_mortality_rate)) %>% 
  arrange(-avg_mortality)
## # A tibble: 3 × 2
##   procedure         avg_mortality
##   <chr>                     <dbl>
## 1 CABG                      1.52 
## 2 All PCI                   1.22 
## 3 Non-Emergency PCI         0.720
cardiac %>% 
  group_by(procedure) %>% 
  summarize(avg_mortality = mean(observed_mortality_rate)) %>% 
  ggplot(aes(x = reorder(procedure, avg_mortality), y = avg_mortality))+
  geom_col(color = "black", fill = "lightblue")+
  coord_flip()+
  labs(title = "Average Observed Mortality Rate by Procedure",
       x = "Procedure",
       y = "Mortality Rate")+
  theme_light(base_size = 12)

How has the mortality rate changed for cardiac surgery over time in each region?

#I'll come back to this later if I figure out the year stuff but feel free to take a crack at it :)
cardiac %>%
  group_by(year_of_hospital_discharge) %>% 
  summarize(observed_mortality_rate=mean(observed_mortality_rate)) %>% 
  ggplot(aes(x =year_of_hospital_discharge, y = observed_mortality_rate))+
  geom_col(color = "purple4", na.rm = T) + geom_smooth(method = lm, se= TRUE) + 
  labs(title = "Regional Cardiac Surgery Morality Rates",
       x = "Year of Hospital Discharge",
       y = "Mean Observed Mortality Rate")
## `geom_smooth()` using formula = 'y ~ x'

cardiac %>% 
  filter(hospital_name == "albany_med._ctr") %>% 
  group_by(year_of_hospital_discharge, hospital_name) %>% 
  summarize(avg_mortality = mean(observed_mortality_rate), .groups = 'keep') %>% 
  ggplot(aes(x = as.factor(year_of_hospital_discharge), y = avg_mortality, group = hospital_name))+
  geom_line()+
  geom_point()

How does the expected mortality rate compare to the observed mortality rate?

#I don't know which graph to plot for this one but if you can figure it out that would be great. I was thinking maybe scatter but I don't know for sure

#Expected mortality vs observed mortality

cardiac %>%
  ggplot(aes(x = expected_mortality_rate, y = observed_mortality_rate))+
  geom_point(color = "steelblue") + geom_smooth(method = lm, se= TRUE) + 
  labs(title = "Expected vs Observed Mortality Rate",
       x = "Expected Mortality",
       y = "Observed Mortality")
## `geom_smooth()` using formula = 'y ~ x'

Which region has the highest mortality rate?

cardiac %>% 
  group_by(region) %>% 
  summarize(avg_mortality = mean(observed_mortality_rate)) %>% 
  arrange(-avg_mortality)
## # A tibble: 8 × 2
##   region                  avg_mortality
##   <chr>                           <dbl>
## 1 Central NY                      1.41 
## 2 Western NY - Rochester          1.26 
## 3 NY Metro - New Rochelle         1.12 
## 4 Capital District                1.10 
## 5 NY Metro - NYC                  1.09 
## 6 New York State                  1.07 
## 7 NY Metro - Long Island          0.935
## 8 Western NY - Buffalo            0.911
cardiac %>% 
  group_by(region) %>% 
  summarize(avg_mortality = mean(observed_mortality_rate)) %>% 
  ggplot(aes(x = reorder(region, avg_mortality), y = avg_mortality))+
  geom_col(color = "black", fill = "lightpink")+
  coord_flip()+
  labs(title = "Average Observed Mortality Rate per Region",
       x = "Region",
       y = "Mortality Rate")+
  theme_light(base_size = 12)

COMPARING NUMBER OF TOTAL PROCEDURES WITH AVERAGE MORTALITY RATE

total_cases <- cardiac %>% 
  group_by(facility_id) %>% 
  summarize(total_procedures = sum(number_of_cases), avg_mortality = mean(observed_mortality_rate)) %>%
  ggplot(aes(x = total_procedures, y = avg_mortality))+
  geom_point(color = "olivedrab") +
  labs(title = "Average Mortality Rate compared with Procedure Numbers ",
       x = "Total Number of Procedure",
       y = "Mortality Rate")+
  theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 5))+
  scale_x_log10()+geom_smooth(method=lm, se=F)
total_cases
## `geom_smooth()` using formula = 'y ~ x'

Map:

#Overall deaths for all years:

#All hospital map
ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude)) + labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") 
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

#For number of cases
ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = number_of_cases)) + #WE just add the chart in without using ggplot! 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") 
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

#For mortality rate

ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = average_moratality_rate)) + #WE just add the chart in without using ggplot! 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") 
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

#For total deaths
ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = total_deaths)) + #WE just add the chart in without using ggplot! 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") 
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

#NYC Rate of mortaility
ggmap(mapnyc) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = average_moratality_rate)) + 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

#NYC Total Cases
ggmap(mapnyc) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = number_of_cases)) + 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

#NYC total deaths
ggmap(mapnyc) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = total_deaths)) + 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

Shiny App

library(shinydashboard)
## 
## Attaching package: 'shinydashboard'
## The following object is masked from 'package:graphics':
## 
##     box
ui <- dashboardPage(skin = "purple",
                    
  dashboardHeader(title = "Cardiac Dashboard"),
  
  ## Sidebar content
  dashboardSidebar(
    sidebarMenu(
      
      menuItem("Mortality Rate", 
               tabName = "dashboard", 
               icon = icon("skull")),
      
      menuItem("Procedures", 
               tabName = "widgets", 
               icon = icon("stethoscope")),
      menuItem("Cases",
               tabName = "cases",
               icon = icon("folder-open")),
      menuItem("Maps",
               tabName = "map",
               icon = icon("earth-americas")),
      menuItem("NYC Maps",
               tabName = "nyc",
               icon = icon("hotel"))
    )
    ),
  
  ## Body content
  dashboardBody(
    
    tabItems(
  
      ## First tab content    
      tabItem(tabName = "dashboard", 
              h1(strong("These are plots produced based on the mortality rate observed in each New York hospital based on cardiac surgery results."), style = "font-size:20px"),
        fluidRow(
          box(plotOutput("plot1a", height = 250)), 
          box(title = "Controls", 
              selectInput("region", 
                          "Select Region of Interest:", 
                          choices=unique(cardiac$region))
          )
          ),
        fluidRow(
          box(plotOutput("plot1b", height = 250)),
          box(title = "Controls",
              selectInput("year",
                          "Select Year of Interest:",
                          choices = unique(cardiac$year_of_hospital_discharge)))
          ),
        fluidRow(
          box(plotOutput("plot1c", height = 250)),
          box(title = "Controls",
              selectInput("hospital",
                          "Select Hospital of Interest:",
                          choices = unique(cardiac$hospital_name)))
          )
          ),

      ## Second tab item 
      tabItem(tabName = "widgets",
              h1(strong("Number of patients broken down by procedure type or region in New York Hospitals"), style = "font-size:20px"),
        fluidRow(
          box(plotOutput("plot2", height = 250)), # box is a container for the plot
          box(title = "Controls", # box is a container for the controls
              radioButtons("x", 
               "Select Fill Variable", 
               choices=c("region", "procedure"),
               selected="region")
          )
          )
          ),
      
      ## Third tab item
      tabItem(tabName = "cases",
              h1(strong("These are plots produced based on the number of cases of cardiac surgery performed in New York Hospitals."), style = "font-size:20px"),
        fluidRow(
          box(plotOutput("plot3a", height = 250)), # box is a container for the plot
          box(title = "Controls", # box is a container for the controls
              selectInput("a", 
               "Select Hospital", 
               choices=c(unique(cardiac$hospital_name))
          )
          )
          ),
        fluidRow(
          box(plotOutput("plot3b", height = 250)), # box is a container for the plot
          box(title = "Controls", # box is a container for the controls
              selectInput("b", 
               "Select Year:", 
               choices=c(unique(cardiac$year_of_hospital_discharge))
          )
          )
          )
          ),
        
        tabItem(tabName = "map",
                h1(strong("These are overall maps addressing different scenarios based on our dataset."), style = "font-size:20px"),
          fluidRow(
            box(plotOutput("plot4a", height = 450)),
            box(plotOutput("plot4b", height = 450)),
            box(plotOutput("plot4c", height = 450)),
            box(plotOutput("plot4d", height = 450))
          )
          ),
      tabItem(tabName = "nyc",
              h1(strong("These are maps zoomed in on New York City addressing different scenarios based on our dataset."), style = "font-size:20px"),
              fluidRow(
            box(plotOutput("plot5a", height = 450)),
          
          ),
          fluidRow(
            box(plotOutput("plot5b", height = 450))
          ),
          fluidRow(
            box(plotOutput("plot5c", height = 450))
          ),
          )
          )
          )
  )
          
server <- function(input, output, session) {
  
  session$onSessionEnded(stopApp)

  output$plot1a <- renderPlot({

    cardiac %>%
      filter(region == input$region) %>%
      ggplot(aes(x = observed_mortality_rate))+ 
      geom_density(color = "black", fill = "steelblue", alpha = 0.6)+
      labs(title = "Observed Mortality Rate Distribution by Region",
           x = "Mortality Rate",
           y = "Proportion of NY Regions")
  })
  
  output$plot1b <- renderPlot({

    cardiac %>%
      filter(year_of_hospital_discharge == input$year) %>%
      ggplot(aes(x = observed_mortality_rate))+ 
      geom_density(color = "black", fill = "steelblue", alpha = 0.6)+
      labs(title = "Observed Mortality Rate Distribution by Year",
           x = "Mortality Rate",
           y = "Proportion of Hospitals")
  })
  
  output$plot1c <- renderPlot({

    cardiac %>% 
      filter(hospital_name == input$hospital) %>% 
      group_by(year_of_hospital_discharge, hospital_name) %>% 
      summarize(avg_mortality = mean(observed_mortality_rate), .groups = 'keep') %>% 
      ggplot(aes(x = as.factor(year_of_hospital_discharge), y = avg_mortality, group = hospital_name))+
      geom_line()+
      geom_point()+
      labs(title = "Mortality Rate Over Time",
           x = "Year",
           y = "Mortality Rate")
  })
  
  
    output$plot2 <- renderPlot({
    
    cardiac %>% 
      ggplot(aes_string(x="region", fill=input$x))+
      geom_bar(position="dodge", alpha=0.8, color="black")+
      labs(x=NULL, y=NULL, fill="Fill Variable")+theme(axis.text.x = element_text(angle=90,hjust = 1))
    
  })
    
    output$plot3a <- renderPlot({
    
    cardiac %>% 
      filter(hospital_name == input$a) %>% 
      group_by(year_of_hospital_discharge) %>% 
      summarize(total_cases_a = sum(number_of_cases)) %>% 
      ggplot(aes_string(x="year_of_hospital_discharge", y = "total_cases_a"))+
      geom_col(alpha=0.8, color="black", fill = "steelblue")+
      labs(title = "Number of Cases per Year",
           x= "Year", 
           y= "Number of Cases")+
        theme_light(base_size = 14)
    
  })
    
    
  output$plot3b <- renderPlot({
    
    cardiac %>% 
      filter(year_of_hospital_discharge == input$b) %>% 
      group_by(facility_id, procedure) %>% 
      summarize(total_procedures = sum(number_of_cases), avg_mortality = mean(observed_mortality_rate)) %>%
      ggplot(aes(x = total_procedures, y = avg_mortality))+
      geom_point(color = "olivedrab") +
      labs(title = "Average Mortality Rate compared with Procedure Numbers ",
       x = "Total Number of Procedures",
       y = "Mortality Rate")+
     theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 5))+
     theme_classic()+
     scale_x_log10()+
     geom_smooth(method=lm, se=F)
    
  })
  
  output$plot4a <- renderPlot({
    
      ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, size = 2)) + labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
    
  })
  
  output$plot4b <- renderPlot({
    
    ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = number_of_cases, size = 2)) + #WE just add the chart in without using ggplot! 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") 
    
  })
  
  output$plot4c <- renderPlot({
    
    ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = average_moratality_rate, size = 2)) + #WE just add the chart in without using ggplot! 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") 
    
  })
  
  output$plot4d <- renderPlot({
    
    ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = total_deaths, size = 2)) + #WE just add the chart in without using ggplot! 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") 
    
  })
  
  output$plot5a <- renderPlot({
    
    ggmap(mapnyc) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = average_moratality_rate, size = 2.5)) + 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
    
  })
  
  output$plot5b <- renderPlot({
    
    ggmap(mapnyc) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = number_of_cases, size = 2.5)) + 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
    
  })
  
  output$plot5c <- renderPlot({
    
    ggmap(mapnyc) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = total_deaths, size = 2.5)) + 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
    
  })
}

shinyApp(ui, server)
Shiny applications not supported in static R Markdown documents